#Code to merge in policy outcomes to existing data sets

library(tidyverse)
library(readr)
library(fuzzyjoin)
library(car)
library(stringr)
library(maps)
library(data.table)
library(cdlTools)
setwd(rprojroot::find_root(rprojroot::is_rstudio_project))
getwd()
##Probably easiest to take a list of votes, merge the outcomes in, drop legislator specific info, 
#and keep respondent info which should be duplicated by vote
#But some votes appear once, some appear twice. Yearx vote is a better unique identifier 
#merge into CCES directly
df<-fread("Data/senate_vote_level_data_census.csv")
outcomes <- readxl::read_xlsx("Intermediate Data/policy_outcomes.xlsx")

df$policy_vote <- substr(df$vote,1,nchar(df$vote)-2)
outcomes$policy_vote
head(outcomes)

head(df)
colnames(outcomes)
#colnames(cces)

##CCES direct merge
cces <- read.csv("Intermediate Data/CCES_merged_policy_responsiveness_missing_roll_call_relig.csv")
keep <- c("X", "state", "cd", "fips", "female", "race", "edu", "religion", 
             "dob", "ideo", "income", "pid7", "reg_voter",
             "donate_any", "voter_ver", "vv_matched", "rcv1_r",
             "rcv2_r","rcv3_r","rcv4_r","rcv5_r","rcv6_r",
             "rcv7_r","rcv8_r","rcv9_r",  "rcv10_r", "year", "house_name", "sen1_name", "sen2_name", "house_vote", "pres_vote",
             "zip", "donate_amount", "weight")
cces <- cces[keep]
#CCES wide to long (vote/respondent level)
df2<-gather(cces, vote, outcome, rcv1_r:rcv10_r, factor_key=TRUE)
df2$year_vote <- paste0(df2$year, "_", df2$vote)

#Variable for merge
outcomes$year_vote <- paste0(outcomes$data_year,"_rcv", outcomes$data_num, "_r")
outcomes$vote_dateh <- substr(outcomes$house_vote_date, 1, 4)
outcomes$vote_dates <- substr(outcomes$senate_vote_date, 1, 4)
outcomes_small<-outcomes[c("year_vote", "became_law_dummy", "considered_house",
                           "considered_senate", "passed_house", "passed_senate",
                           "became_law", "cces_name", "vote_dateh", "vote_dates")]

df3<-left_join(df2, outcomes_small, by=c("year_vote" = "year_vote"))
#df3<-df3[df3$became_law%in%c(1,0),]

voted <- readxl::read_xlsx("Intermediate Data/policy_outcomes_and_notes.xlsx")
voted$year_vote <- paste0(voted$data_year, "_rcv", voted$data_num, "_r")
voted <- voted %>% select(year_vote, vote_before_cces, vote_after_cces)
voted$vote_after_cces <- ifelse(voted$vote_before_cces!=1, 1, 0)
df3<-left_join(df3, voted, by="year_vote")
rm(voted)
fwrite(df3, "data/policy_outcomes.csv")


policy <- df3
policy$white_r <- policy$race=="1"
policy$black_r <- policy$race=="2"
policy$latino_r <- policy$race=="3"
policy$asian_r <- policy$race=="4"
policy$other_r <- policy$race%in%c("5", "6", "7", "8", "9")


policy$first_quartile_r <- policy$income=="1"
policy$second_quartile_r <- policy$income=="2"
policy$third_quartile_r <- policy$income=="3"
policy$fourth_quartile_r <- policy$income=="4"

policy$highschool_r <- policy$edu %in% c("1", "2")
policy$somecollege_r <- policy$edu %in% c("3", "4")
policy$bachelors_r <- policy$edu %in% c("5")
policy$postgrad_r <- policy$edu %in% c("6")

dk <- policy %>%
  mutate(dk = ifelse(outcome > 1, 1, 0),
         dk = ifelse(is.na(outcome), 1, dk)) %>%
  group_by(year_vote) %>%
  summarise(perc_dk = mean(dk, na.rm=T)) 

policy <- subset(policy, outcome <= 1)

dk <- subset(dk, year_vote %in% policy$year_vote)

policy$pref_outcome <- ifelse(policy$became_law==policy$outcome, 1, 0)
policy$house_outcome <- ifelse(policy$passed_house==policy$outcome, 1, 0)
policy$senate_outcome <- ifelse(policy$passed_senate==policy$outcome, 1, 0)

policy$pref_outcome <- ifelse(policy$outcome==8, NA, policy$pref_outcome )
policy$house_outcome <- ifelse(policy$outcome==8, NA, policy$house_outcome )
policy$senate_outcome <- ifelse(policy$outcome==8, NA, policy$senate_outcome )

#Turning 99s and 98s into NAs
policy <- policy %>%
  mutate(pid7 = ifelse(pid7>7, NA, pid7),
         ideo = ifelse(ideo>5, NA, ideo),
         edu = ifelse(edu>6, NA, edu),
         income = ifelse(income>4, NA, income),
         reg_voter = ifelse(reg_voter>1, NA, reg_voter)
  )

cap_codes<-fread("Intermediate Data/Congressional Agenda/CES CAP Coding All Questions.csv")

cap_codes$vote <- paste0("rcv",cap_codes$data_num, "_r")
policy<-left_join(policy, cap_codes, b=c("year"="data_year", "vote" = "vote"))
rm(cap_codes)

policy$Bill_topic

#read election returns

fec_returns <- data.table::fread("Intermediate Data/FECreturnsGenerals04_20.csv")

#take the max vote percent of every year
fec_returns<-fec_returns %>% group_by(state, office, dist, year) %>% mutate(vote = sum(vote.g, na.rm=T))%>%
   filter(vote.g==max(vote.g)) %>%
  mutate(incumbent.voteshare = vote.g/vote)
fec_returns$merge_state <- as.character(fips(fec_returns$state))

policy$merge_year <- (policy$year.y-2002)%/%2 * 2 + 2002
#policy$temp <- as.numeric(policy$state)
#policy$merge_state <- fips(policy$state)
#policy$merge_state <- integer(policy$merge_state)
policy<-left_join(policy, fec_returns, by = c("cd" = "dist", "merge_year" = "year", "state" = "merge_state"))
getwd()
fwrite(policy, "Data/policy_outcomes_withissueareas_AM25.csv")

